Temporary decline in CO₂ due to COVID-19
Stacked area plot of drop in CO₂ emissions
Today we will work on the following graph from the article Emissions Are Surging Back as Countries and States Reopen -

I downloaded the dataset as an Excel file and saved data for individual countries as csv files.
import altair as alt
import pandas as pd
#hide_output
alt.renderers.set_embed_options(actions=False)
ind = pd.read_csv('ind_co2_em.csv')
ind = ind.iloc[1:]
chn = pd.read_csv('china_co2_em.csv', sep=';')
chn = chn.iloc[1:]
us = pd.read_csv('us_co2_em.csv', sep=';')
us = us.iloc[1:]
euuk = pd.read_csv('euuk_co2_em.csv', sep=';')
euuk = euuk.iloc[1:]
globl = pd.read_csv('global_co2_em.csv', sep=';')
globl = globl.iloc[1:]
data = pd.concat([chn, ind, euuk, us, globl])
data['DATE'] = pd.to_datetime(data['DATE'],format='%d/%m/%Y')
data[['PWR_CO2_MED','IND_CO2_MED','TRS_CO2_MED','PUB_CO2_MED','RES_CO2_MED','AVI_CO2_MED']] = data[['PWR_CO2_MED','IND_CO2_MED','TRS_CO2_MED','PUB_CO2_MED','RES_CO2_MED','AVI_CO2_MED']].apply(pd.to_numeric)
data.head()
If you observe the chart closely you will realize that the graph is stacked, so that is what we will do right away using altair's area chart -
alt.Chart(data).mark_area().encode(
x=alt.X('DATE:T'),
y=alt.Y('TOTAL_CO2_MED:Q'),
color=alt.Color('REGION_NAME:N'),#,scale=alt.Scale(scheme='reds')),
).properties(width=800, height=400)
This is close but not exactly like what we saw in the article. If you look closely you'd realize that the order of countries is different. So we will try to follow the same order using the order encoding field.
alt.Chart(data).mark_area().transform_calculate(order="{'CHN': 0, 'IND': 1, 'EUandUK': 2, 'USA': 3, 'GLOBAL': 4}[datum.REGION_CODE]").encode(
x=alt.X('DATE:T'),
y=alt.Y('TOTAL_CO2_MED:Q'),
color=alt.Color('REGION_CODE:N'),#,scale=alt.Scale(scheme='reds')),
order='order:O'
).properties(width=800, height=400)
This is exactly like it. Let's change the colors, I probably would have done it the following way -
alt.Chart(data).mark_area().transform_calculate(order="{'CHN': 0, 'IND': 1, 'EUandUK': 2, 'USA': 3, 'GLOBAL': 4}[datum.REGION_CODE]").encode(
x=alt.X('DATE:T'),
y=alt.Y('TOTAL_CO2_MED:Q'),
color=alt.Color('REGION_CODE:N',scale=alt.Scale(domain=['CHN', 'IND', 'EUandUK', 'USA', 'GLOBAL'], range=["#c9c9c9", "#aaaaaa", "#888888", "#686868", "#454545"])),
order='order:O'
).properties(width=800, height=400)
To make it just like the graph in the article, we will pick colors from here https://imagecolorpicker.com/en/
alt.Chart(data).mark_area().transform_calculate(order="{'CHN': 0, 'IND': 1, 'EUandUK': 2, 'USA': 3, 'GLOBAL': 4}[datum.REGION_CODE]").encode(
x=alt.X('DATE:T'),
y=alt.Y('TOTAL_CO2_MED:Q'),
color=alt.Color('REGION_CODE:N',scale=alt.Scale(domain=['CHN', 'IND', 'EUandUK', 'USA', 'GLOBAL'], range=["#fde9d1", "#fcd08b", "#f9b382", "#e38875", "#ac7066"])),
order='order:O'
).properties(width=800, height=400)
If you look closely, you would notice that we are capturing the trend perfectly, however the area for "REST of the world" is much more than what it should be.
That is because, its duplicating the data from US, EU, India, and China. So we need to subtract the contributions of these places from the global data and then stack them.
chn['DATE'] = pd.to_datetime(chn['DATE'],format='%d/%m/%Y')
ind['DATE'] = pd.to_datetime(ind['DATE'],format='%d/%m/%Y')
us['DATE'] = pd.to_datetime(us['DATE'],format='%d/%m/%Y')
euuk['DATE'] = pd.to_datetime(euuk['DATE'],format='%d/%m/%Y')
globl['DATE'] = pd.to_datetime(globl['DATE'],format='%d/%m/%Y')
ind[list(ind.columns)[5:]] = ind[list(ind.columns)[5:]].apply(pd.to_numeric)
chn[list(chn.columns)[5:]] = chn[list(chn.columns)[5:]].apply(pd.to_numeric)
us[list(us.columns)[5:]] = us[list(us.columns)[5:]].apply(pd.to_numeric)
euuk[list(euuk.columns)[5:]] = euuk[list(euuk.columns)[5:]].apply(pd.to_numeric)
globl[list(globl.columns)[5:]] = globl[list(globl.columns)[5:]].apply(pd.to_numeric)
countries_sum = ind[list(ind.columns)[5:]]+chn[list(chn.columns)[5:]]+us[list(us.columns)[5:]]+euuk[list(euuk.columns)[5:]]
rest = globl[list(globl.columns)[5:]] - countries_sum[list(countries_sum.columns)]
rest['REGION_ID'] = 99
rest['REGION_CODE'] = 'RST'
rest['REGION_NAME'] = 'REST'
rest['TIME_POINT'] = globl['TIME_POINT']
rest['DATE'] = globl['DATE']
data = pd.concat([chn, ind, euuk, us, rest])
alt.Chart(data).mark_area().transform_calculate(order="{'CHN': 0, 'IND': 1, 'EUandUK': 2, 'USA': 3, 'RST': 4}[datum.REGION_CODE]").encode(
x=alt.X('DATE:T', axis=alt.Axis(format=("%B"))),
y=alt.Y('TOTAL_CO2_MED:Q'),
color=alt.Color('REGION_CODE:N',scale=alt.Scale(domain=['CHN', 'IND', 'EUandUK', 'USA', 'RST'], range=["#fde9d1", "#fcd08b", "#f9b382", "#e38875", "#ac7066"])),
order='order:O'
).properties(width=800, height=400).configure_view(strokeWidth=0).configure_axis(grid=False)
This looks exactly like the chart in the article. Right now there is no way to properly add text in a stacked chart's corresponding area, but let's try it anyways so that once this option is available in Vega-Lite we will fix this code immediately later on.
base = alt.Chart(data).mark_area().transform_calculate(order="{'CHN': 0, 'IND': 1, 'EUandUK': 2, 'USA': 3, 'RST': 4}[datum.REGION_CODE]").encode(
x=alt.X('DATE:T', axis=alt.Axis(format=("%B"))),
y=alt.Y('TOTAL_CO2_MED:Q'),
color=alt.Color('REGION_CODE:N',scale=alt.Scale(domain=['CHN', 'IND', 'EUandUK', 'USA', 'RST'], range=["#fde9d1", "#fcd08b", "#f9b382", "#e38875", "#ac7066"])),
order='order:O'
).properties(width=800, height=400)
t = alt.Chart(data).mark_text().encode(
x=alt.X('DATE:T', aggregate='median', ),
#y=alt.Y('variety:N'),
#detail='REGION_CODE:N',
text=alt.Text('REGION_NAME:N'),
y='min(TOTAL_CO2_MED):Q',
#text='REGION_NAME:N'
)
(base+t).configure_view(strokeWidth=0).configure_axis(grid=False)
While we are at it we can also make the following graph of global emissions by sector -

The main idea behind these plots is layering an area plot on top of a line chart with the area shaded by the LOW and HIGH columns -
line = alt.Chart(globl).mark_line().encode(
x='DATE:T',
y=alt.Y('TRS_CO2_MED:Q'),
)
band = line.mark_area(opacity=0.3).encode(
x='DATE:T',
y=alt.Y('TRS_CO2_LOW:Q'),
y2=alt.Y2('TRS_CO2_HIGH:Q'),
)
line+band
Now we are going to change the data so that we can facet it properly like in the article's chart -
globl.drop(['REGION_ID', 'REGION_CODE', 'REGION_NAME', 'TOTAL_CO2_MED', 'TOTAL_CO2_HIGH', 'TOTAL_CO2_LOW'], axis=1, inplace=True)
#globl.drop(['TRS_CO2_MED', 'IND_CO2_MED', 'PWR_CO2_MED', 'PUB_CO2_MED', 'AVI_CO2_MED', 'RES_CO2_MED'], axis=1, inplace=True)
globl.head()
data = pd.concat([pd.melt(globl.filter(regex='_MED|TIME_POINT|DATE'), id_vars=['TIME_POINT', 'DATE'], var_name='MED_KEY', value_name='MED_VALUES'),
pd.melt(globl.filter(regex='_HIGH|TIME_POINT|DATE'), id_vars=['TIME_POINT', 'DATE'], var_name='HIGH_KEY', value_name='HIGH_VALUES'),
pd.melt(globl.filter(regex='_LOW|TIME_POINT|DATE'), id_vars=['TIME_POINT', 'DATE'], var_name='LOW_KEY', value_name='LOW_VALUES')],
axis=1).T.drop_duplicates().T
data
a = alt.Chart().mark_area(opacity=0.5).encode(
x=alt.X('DATE:T', axis=alt.Axis(format="%b")),
y2='HIGH_VALUES:Q',
y='LOW_VALUES:Q'
)
l = alt.Chart().mark_line().encode(
x='DATE:T',
y='MED_VALUES:Q'
)
alt.layer(a, l, data=data).facet(alt.Column('LOW_KEY', title="Change in global CO\u2082 emissions by sector", sort=['TRS_CO2_LOW', 'IND_CO2_LOW', 'PWR_CO2_LOW', 'AVI_CO2_LOW', 'PUB_CO2_LOW', 'RES_CO2_LOW']), columns=3).configure_axis(grid=False, title=None).configure_axisX(orient='top', offset=-27).configure_view(strokeWidth=0).resolve_scale(x='independent').configure_header(
titleFontSize=20,
labelFontSize=14
)#.properties(title=None)
#(a+l).facet('LOW_KEY:N', sort=alt.SortArray(['TRS_CO2_LOW', 'IND_CO2_LOW', 'PWR_CO2_LOW', 'AVI_CO2_LOW', 'PUB_CO2_LOW', 'RES_CO2_LOW']))